jj <- read_csv(here("data","sensors", "sensor-data_all.csv")) %>%
clean_names() %>%
mutate(date_time=mdy_hms(date_time), #aapply lubridate to date/time column
date=format(date_time, '%m/%d/%Y'), #create only date column
time=format(date_time, '%H:%M:%S')) %>% #create only time column
select(site, sensor_number, date_time, date, time, temp_c, p_h) %>%
mutate(site=replace(site, site=="LOL", "Lompoc Landing"),
site=replace(site, site=="ALG", "Alegria"),
site=replace(site, site=="BML", "Bodega Bay")) #rename locations
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Site = col_character(),
## `Sensor number` = col_double(),
## `Download date` = col_character(),
## `Calibration date` = col_character(),
## `Temp_(C)` = col_double(),
## `Voltage#1` = col_double(),
## TK = col_double(),
## `S(T)` = col_double(),
## `Eo(T)` = col_double(),
## pH = col_double(),
## `Date time` = col_character()
## )
#set site order for plotting (legend)
jj$site <- factor(jj$site, levels=c("Alegria", "Lompoc Landing", "Bodega Bay"))
ggplot(jj, aes(x=date_time, y=p_h, group=site)) +
geom_line(aes(color=site), size=0.7) +
geom_point(aes(color=site), size=0.5) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "june-aug.png"), height=20, width=40, units="cm")
ggplot(jj, aes(x=date_time, y=temp_c, group=site)) +
geom_line(aes(color=site), size=0.7) +
geom_point(aes(color=site), size=0.5) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors","june-aug_temp.png"), height=20, width=40, units="cm")
bml <- jj %>%
filter(site=="Bodega Bay")
ggplot(bml, aes(x=date_time)) +
geom_line(aes(y=p_h), color="red") +
geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
scale_y_continuous(name = "pH", #first axis name
sec.axis = sec_axis(~., name="Temp (C)")) + #second axis name and features
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
## Try it with pH and temp sorted as "groups"
bml2 <- bml %>%
pivot_longer(cols=temp_c:p_h,
names_to = "group",
values_to = "value")
ggplot(bml2, aes(x=date_time, y=value, group=group)) +
geom_line(aes(color=group), size=0.7) +
#geom_point(aes(color=group), size=0.5) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
Let’s come back to this…
Sites:
Glen: number of days to extract
Interval:
bml_tide <- read_html("http://tbone.biol.sc.edu/tide/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=83;units=feet;year=2021;month=05;day=31;hour=09;min=30;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Bodega%20Harbor%20entrance%2C%20California") %>%
html_elements("pre") %>% #select only the date, time, and tide values from the webpage
html_text2() %>% #convert list to data table
data.frame() %>% #convert table to data frame
mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
unnest(date_tide) %>% #unnest into two columns
mutate(date_tide=as.factor(date_tide)) %>% #make column values factors
separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest()
unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
drop_na() %>% #remove final row with NA (not sure why that's even there)
mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [7969].
ggplot(bml_tide, aes(x=date_time, y=tide)) +
geom_line(size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("2 days"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("Tide height") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "bml_tide.png"), height=20, width=40, units="cm")
bml_all <- full_join(bml, bml_tide)
## Joining, by = "date_time"
bml3 <- bml_all %>%
drop_na(c(tide, p_h)) %>% #whoops, started off collecting data every 10 minutes and then switched to 15 minutes (so account for that by removing pH and tide values that don't overlap)
pivot_longer(cols=temp_c:tide,
names_to = "data",
values_to = "value")
ggplot(bml3, aes(x=date_time, y=value, group=data)) +
geom_line(aes(color=data), size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "bml_pH_temp_tide.png"), height=20, width=40, units="cm")
bml_detide <- bml_all %>%
filter(tide>-0.5) %>%
drop_na(c(p_h)) #drop observations that don't overlap (10 min vs 15 min sampling interval)
ggplot(bml_detide, aes(x=date_time)) +
geom_line(aes(y=tide), color="red") +
geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
bml_detide_temp <- bml_detide %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
filter(diff>-1.0) %>% #remove observations with large temp differences
mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
filter(diff2<1,
diff2>-1)
ggplot(bml_detide_temp, aes(x=date_time)) +
geom_line(aes(y=p_h), color="red") +
geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
bml_detide_temp_ph <- bml_detide_temp %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff3 = p_h - lag(p_h, default = first(p_h))) %>% #find difference between two subsequent pH measurements to identify anomalies
filter(diff3>-0.5,
diff3<0.5) #remove weird readings on 7/10
ggplot(bml_detide_temp_ph, aes(x=date_time)) +
geom_line(aes(y=p_h), color="#009E73") +
geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=16, hjust=0, label="Temp (C)", color="#D55E00") +
annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=15, hjust=-0.1, label="pH", color="#009E73") +
annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=14, hjust=0, label="Tide", color="#0072B2") +
xlab("Date & time") +
ylab("Value") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "bml_detide_tide-temp-pH.png"), height=20, width=40, units="cm")
lol_tide <- read_html("http://tbone.biol.sc.edu/tide/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=66;units=feet;year=2021;month=06;day=14;hour=08;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Point%20Arguello%2C%20California") %>%
html_elements("pre") %>% #select only the date, time, and tide values from the webpage
html_text2() %>% #convert list to data table
data.frame() %>% #convert table to data frame
mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
unnest(date_tide) %>% #unnest into two columns
mutate(date_tide=as.factor(date_tide)) %>% #make column values factors
separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest()
unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
drop_na() %>% #remove final row with NA (not sure why that's even there)
mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [6337].
ggplot(lol_tide, aes(x=date_time, y=tide)) +
geom_line(size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("2 days"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("Tide height") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "lol_tide.png"), height=20, width=40, units="cm")
# filter all data for LOL
lol <- jj %>%
filter(site=="Lompoc Landing")
lol_all <- full_join(lol, lol_tide) %>%
filter(date_time < ymd_hms("2021-08-19 02:30:00"))
## Joining, by = "date_time"
lol_plot <- lol_all %>%
pivot_longer(cols=temp_c:tide,
names_to = "data",
values_to = "value")
ggplot(lol_plot, aes(x=date_time, y=value, group=data)) +
geom_line(aes(color=data), size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "lol_pH_temp_tide.png"), height=20, width=40, units="cm")
lol_detide <- lol_all %>%
filter(tide>0.5)
ggplot(lol_detide, aes(x=date_time)) +
geom_line(aes(y=tide), color="red") +
geom_line(aes(y=temp_c), color="blue") +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
lol_check <- lol_detide %>%
filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) #find difference between two subsequent temp measurements to identify anomalies
#filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)
#Thank you stas g (https://stats.stackexchange.com/a/164830) for this function
find_peaks <- function (x, m = 3){
shape <- diff(sign(diff(x, na.pad = FALSE)))
pks <- sapply(which(shape < 0), FUN = function(i){
z <- i - m + 1
z <- ifelse(z > 0, z, 1)
w <- i + m + 1
w <- ifelse(w < length(x), w, length(x))
if(all(x[c(z : i, (i + 2) : w)] <= x[i + 1])) return(i + 1) else return(numeric(0))
})
pks <- unlist(pks)
pks
}
pk <- find_peaks(lol_check$temp_c, m = 50) #set a high threshold bc we need it...
lol_check_peak <- lol_detide %>%
filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
#filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)
mutate(peak = ifelse(row_number() %in% c(pk) == TRUE, 1, 0)) #if a row ID matches the row ID found by find_peaks, then call it "1" (if not, "0")
ggplot(lol_check_peak, aes(x=date_time)) +
geom_line(aes(y=tide), color="red") +
geom_line(aes(y=temp_c), color="blue") +
#geom_line(aes(y=diff), color="green") +
#geom_line(aes(y=diff2), color="orange") +
geom_point(aes(y=temp_c, color=ifelse(peak>0, "red", "black"), size=ifelse(peak>0, 2, 0))) + #if it's a peak, color it red and make it big
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90)) +
scale_color_identity() +
scale_size_identity() +
geom_vline(aes(xintercept = date_time), lol_check_peak %>% filter(peak == 1)) + #if it's a peak, draw a vertical line
scale_y_continuous(breaks = round(seq(min(lol_check_peak$tide), max(lol_check_peak$tide), by = 0.5),1))
Looks like anything below a 1.5 tide height is definitely disconnected from the ocean
lol_detide_temp_ph <- lol_all %>%
filter(tide>1.5) %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff3 = p_h - lag(p_h, default = first(p_h))) #doesn't look like pH values jump extraordinarily
ggplot(lol_detide_temp_ph, aes(x=date_time)) +
geom_line(aes(y=p_h), color="blue") +
geom_line(aes(y=diff3), color="green") +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggplot(lol_detide_temp_ph, aes(x=date_time)) +
geom_line(aes(y=p_h), color="#009E73") +
geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
annotate(geom="text", x=as.POSIXct("2021-8-14 00:01:00"), y=20, hjust=0, label="Temp (C)", color="#D55E00") +
annotate(geom="text", x=as.POSIXct("2021-8-14 00:01:00"), y=19, hjust=-0.1, label="pH", color="#009E73") +
annotate(geom="text", x=as.POSIXct("2021-8-14 00:01:00"), y=18, hjust=0, label="Tide", color="#0072B2") +
xlab("Date & time") +
ylab("Value") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "lol_detide_tide-temp.png"), height=20, width=40, units="cm")
alg_tide <- read_html("http://tbone.biol.sc.edu/tide/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=66;units=feet;year=2021;month=06;day=14;hour=08;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Gaviota%2C%20California") %>%
html_elements("pre") %>% #select only the date, time, and tide values from the webpage
html_text2() %>% #convert list to data table
data.frame() %>% #convert table to data frame
mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
unnest(date_tide) %>% #unnest into two columns
mutate(date_tide=as.factor(date_tide)) %>% #make column values factors
separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest()
unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
drop_na() %>% #remove final row with NA (not sure why that's even there)
mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [6337].